Close

@InProceedings{EscherDrewBem:2021:FaSpTr,
               author = "Escher, Rafael Molossi and Drews-Jr, Paulo and Bem, Rodrigo 
                         Andrade de",
          affiliation = "{Federal University of Rio Grande } and {Federal University of Rio 
                         Grande } and {Federal University of Rio Grande}",
                title = "Fast Spatial-Temporal Transformer Network",
            booktitle = "Proceedings...",
                 year = "2021",
               editor = "Paiva, Afonso and Menotti, David and Baranoski, Gladimir V. G. and 
                         Proen{\c{c}}a, Hugo Pedro and Junior, Antonio Lopes Apolinario 
                         and Papa, Jo{\~a}o Paulo and Pagliosa, Paulo and dos Santos, 
                         Thiago Oliveira and e S{\'a}, Asla Medeiros and da Silveira, 
                         Thiago Lopes Trugillo and Brazil, Emilio Vital and Ponti, Moacir 
                         A. and Fernandes, Leandro A. F. and Avila, Sandra",
         organization = "Conference on Graphics, Patterns and Images, 34. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "Deep Learning, Video Inpainting, Reformer Networks, Transformer 
                         Networks.",
             abstract = "In computer vision, the restoration of missing regions in an image 
                         can be tackled with image inpainting techniques. Neural networks 
                         that perform inpainting in videos require the extraction of 
                         information from neighboring frames to obtain a temporally 
                         coherent result. The state-of-the-art methods for video inpainting 
                         are mainly based on Transformer Networks, which rely on attention 
                         mechanisms to handle temporal input data. However, such networks 
                         are highly costly, requiring considerable computational power for 
                         training and testing, which hinders its use on modest computing 
                         platforms. In this context, our goal is to reduce the 
                         computational complexity of state-ofthe-art video inpainting 
                         methods, improving performance and facilitating its use in low-end 
                         GPUs. Therefore, we introduce the Fast Spatio-Temporal Transformer 
                         Network (FastSTTN), an extension of the Spatio-Temporal 
                         Transformer Network (STTN) in which the adoption of Reversible 
                         Layers reduces memory usage up to 7 times and execution time by 
                         approximately 2.2 times, while maintaining state-of-the-art video 
                         inpainting accuracy.",
  conference-location = "Gramado, RS, Brazil (virtual)",
      conference-year = "18-22 Oct. 2021",
                  doi = "10.1109/SIBGRAPI54419.2021.00018",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI54419.2021.00018",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/45CUSQ5",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/45CUSQ5",
           targetfile = "FastSTTN___SIBGRAPI_2021.pdf",
        urlaccessdate = "2024, May 06"
}


Close